Report Assessment Ch5 Subsidies Reform

Data Management Report

Author
Affiliation

Rainer M. Krug

Doi
Abstract

A short description what this is about. This is not a tracditional abstract, but rather something else …

Working Title

IPBES_TCA_Ch5_Subsidies_Reform

Code repo

Github - private

Build No: 98

%The BuidNo is automatically increased by one each time the report is rendered. It is used to indicate different renderings when the version stays the same%.

Introduction

The following steps will be done in documented in this report:

Methods

The search terms are based on the shared google doc. They are cleaned up for the usage in OpenAlex.

R1 count from OpenAlex

The search terms is R1

Show the code
#|

R1_count <- sapply(
    R1_st,
    function(st) {
        openalexR::oa_fetch(
            title_and_abstract.search = compact(st),
            count_only = TRUE,
            verbose = TRUE
        )[, "count"]
    }
) |>
    sum()

R2 count from OpenAlex

The search terms is R2

Show the code
#|

R2_count <- openalexR::oa_fetch(
    title_and_abstract.search = compact(R2_st),
    count_only = TRUE,
    verbose = TRUE
)[, "count"]

R1 AND R2 count from OpenAlex

The search term is R1 AND R2

Show the code
#|

R1_R2_count <- sapply(
    R1_st,
    function(st) {
        openalexR::oa_fetch(
            title_and_abstract.search = compact(paste0("(", st, ") AND (", R2_st, ")")),
            count_only = TRUE,
            output = "list",
            verbose = TRUE
        )$count
    }
) |>
    sum()

Download R1 AND R2 Corpus

The corpus download will be stored in data/pages and the arrow database in data/corpus.

This is not on github!

The corpus can be read by running read_corpus() which opens the database so that then it can be fed into a dplyr pipeline. After most dplyr functions, the actual data needs to be collected via dplyr::collect().

Only then is the actual data read!

Needs to be enabled by setting eval: true in the code block below.

Show the code
#|

tic()

get_corpus_pages <- function(search, pages_dir, rds_dir = "pages_publication_year=") {
    dir.create(
        path = pages_dir,
        showWarnings = FALSE,
        recursive = TRUE
    )

    years <- oa_fetch(
        title_and_abstract.search = compact(search),
        group_by = "publication_year",
        paging = "cursor",
        verbose = FALSE
    )$key

    #######
    #######
    # processed <- list.dirs(
    #     path = pages_dir,
    #     full.names = FALSE,
    #     recursive = FALSE
    # ) |>
    #     gsub(
    #         pattern = paste0("^pages_publication_year=", ""),
    #         replacement = ""
    #     )

    # interrupted <- list.files(
    #     path = pages_dir,
    #     pattern = "^next_page.rds",
    #     full.names = TRUE,
    #     recursive = TRUE
    # ) |>
    #     gsub(
    #         pattern = paste0("^", pages_dir, "/pages_publication_year=", ""),
    #         replacement = ""
    #     ) |>
    #     gsub(
    #         pattern = "/next_page.rds$",
    #         replacement = ""
    #     )

    # completed <- processed[!(processed %in% interrupted)]

    # years <- years[!(years %in% completed)]
    #######
    #######

    result <- pbmcapply::pbmclapply(
        sample(years),
        function(y) {
            message("\nGetting data for year ", y, " ...")
            output_path <- file.path(pages_dir, paste0(rds_dir, y))

            dir.create(
                path = output_path,
                showWarnings = FALSE,
                recursive = TRUE
            )

            data <- oa_query(
                title_and_abstract.search = compact(search),
                publication_year = y,
                options = list(
                    select = c("id", "doi", "authorships", "publication_year", "display_name", "abstract_inverted_index", "topics")
                ),
                verbose = FALSE
            ) |>
                IPBES.R::oa_request_IPBES(
                    count_only = FALSE,
                    output_path = output_path,
                    verbose = TRUE
                )
        },
        mc.cores = 8,
        mc.preschedule = FALSE
    ) |>
        unlist()

    invisible(result)
}

sts <- sapply(
    R1_st,
    function(st) {
        compact(paste0("(", st, ") AND (", R2_st, ")"))
    }
)


get_corpus_pages(
    search = sts[[1]],
    pages_dir = file.path(".", "data", "pages"),
    rds_dir = "pages_1_publication_year="
)


get_corpus_pages(
    search = sts[[2]],
    pages_dir = file.path(".", "data", "pages"),
    rds_dir = "pages_2_publication_year="
)


toc()

The fields author and topics are serialized in the arrow database and need to be unserialized by using unserialize_arrow() on a dataset containing the two columns.

Show the code
tic()

pages_dir <- file.path(".", "data", "pages")
arrow_dir <- file.path(".", "data", "corpus")


years <- list.dirs(
    path = pages_dir,
    full.names = TRUE,
    recursive = FALSE
) |>
    strsplit(
        split = "="
    ) |>
    sapply(
        FUN = function(x) {
            x[2]
        }
    ) |>
    unique() |>
    sort()

# years_done <- list.dirs(
#     path = arrow_dir,
#     full.names = TRUE,
#     recursive = FALSE
# )

# years <- years[
#     (
#         gsub(
#             x = years,
#             pattern = paste0("^", pages_dir, "/pages_publication_year="),
#             replacement = ""
#         ) # %in% gsub(
#         #     x = years_done,
#         #     pattern = paste0("^", arrow_dir, "/publication_year="),
#         #     replacement = ""
#         # )
#     )
# ]

result <- pbapply::pblapply(
    sample(years),
    function(year) {
        message("\n     Processing year ", year, " ...\n")
        pages <- c(
            list.files(
                path = pages_dir,
                pattern = paste0("\\.rds$"),
                full.names = TRUE,
                recursive = TRUE
            ) |>
                grep(
                    pattern = year,
                    value = TRUE
                ) |>
                grep(
                    pattern = ".*publication.*",
                    value = TRUE
                )
        )
        data <- parallel::mclapply(
            pages,
            function(page) {
                # message("Processing ", page, " ...")
                p <- readRDS(file.path(page))$results
                if (length(p) == 0) {
                    p <- NULL
                } else {
                    p <- openalexR::works2df(p, verbose = FALSE)
                    p$author_abbr <- IPBES.R::abbreviate_authors(p)
                }
                return(p)
            },
            mc.cores = 1 # params$mc.cores
        ) |>
            do.call(what = dplyr::bind_rows) |>
            distinct(id, .keep_all = TRUE)

        saveRDS(
            data,
            file = file.path(pages_dir, paste0(year, ".rds"))
        )

        data <- serialize_arrow(data)

        arrow::write_dataset(
            data,
            path = arrow_dir,
            partitioning = "publication_year",
            format = "parquet",
            existing_data_behavior = "overwrite"
        )
    }
)

arrow_dir <- file.path(".", "data", "corpus_tca")

read_corpus("data/corpus") |>
    dplyr::collect() |>
    dplyr::filter(id %in% readRDS("data/ids_subs_tca.rds")) |>
    arrow::write_dataset(
        path = arrow_dir,
        partitioning = "publication_year",
        format = "parquet",
        existing_data_behavior = "overwrite"
    )

toc()

Identify Works in Subsidies Corpus AND TCA corpus

Show the code
#|

ids_subsidies <- read_corpus(file.path("data", "corpus")) |>
    dplyr::select(id) |>
    collect() |>
    unlist()
Warning: Invalid metadata$r
Show the code
ids_tca <- read_corpus(file.path("..", "IPBES_TCA_Corpus", "data", "corpus")) |>
    dplyr::select(id) |>
    collect() |>
    unlist()
Warning: Invalid metadata$r
Show the code
ids_subs_tca <- ids_tca[ids_tca %in% ids_subsidies]

saveRDS(ids_subs_tca, file = file.path("data", "ids_subs_tca.rds"))

Export data for sentiment analysis

Show the code
#|

read_corpus("data/corpus") |>
    dplyr::select(id, publication_year, ab) |>
    dplyr::collect() |>
    write.table(file = "sent_analysis_subsidies.txt")

read_corpus("data/corpus_tca") |>
    dplyr::select(id, publication_year, ab) |>
    dplyr::collect() |>
    write.table(file = "sent_analysis_subsidies_tca.txt")

Export 50 random papers for manual analysis

Show the code
#|

set.seed(13)

read_corpus("data/corpus") |>
    dplyr::select(id, doi, author_abbr, display_name, ab) |>
    dplyr::rename(abstract = ab, title = display_name) |>
    dplyr::slice_sample(n = 50) |>
    dplyr::collect() |>
    writexl::write_xlsx(path = "random_50_subsidies.xlsx")

set.seed(14)

read_corpus("data/corpus_tca") |>
    dplyr::select(id, doi, author_abbr, display_name, ab) |>
    dplyr::rename(abstract = ab, title = display_name) |>
    dplyr::slice_sample(n = 50) |>
    dplyr::collect() |>
    writexl::write_xlsx(path = "random_50_subsidies_in_tca.xlsx")

Results

Number of hits

The number of hits are hits of the terms of the whole of the OpenAlex corpus. Due to methodological issues, the number of R1 AND R2 are overestimates and contain some double counting.

  • R1 in OpenAlex: 114,355 hits
  • R2 in OpenAlex: 34,410,842 hits
  • R1 AND R2: in OpenAlex 40,972 hits
  • R1 AND R2 in TCA corpus: 12,095 hits

Manual review 50 paper

The file contains the id, doi, author_abbr and abstract of the papers. Two samples were generated:

  • works in the subsidies corpus AND in the TCA corpus which can be downloded here.

Sentiment analysis

Two csv containing the id, publication_year and `ab’ (abstract) were extracted:

  • works in the subsidies corpus AND in the TCA corpus which can be downloded here.

Publications over time

The red line is the cumulative proportion of publications, the blue line the cumulative proportion of all of the Op[enAlex corpus. Both use the secondeary (red) axis.

Show the code
data <- read_corpus(file.path("data", "corpus_tca")) |>
    dplyr::select(publication_year) |>
    dplyr::arrange(publication_year) |>
    dplyr::collect() |>
    table() |>
    as.data.frame() |>
    mutate(
        publication_year = as.integer(as.character(publication_year)),
        p = Freq / sum(Freq),
        p_cum = cumsum(p)
    ) |>
    rename(
        count = Freq
    ) |>
    dplyr::inner_join(
        y = openalexR::oa_fetch(
            group_by = "publication_year",
            output = "tibble",
            verbose = FALSE
        ) |>
            dplyr::select(
                key,
                count
            ) |>
            dplyr::rename(
                publication_year = key,
                count_oa = count
            ) |>
            dplyr::arrange(publication_year) |>
            dplyr::mutate(
                publication_year = as.integer(as.character(publication_year)),
                p_oa = count_oa / sum(count_oa),
                p_oa_cum = cumsum(p_oa)
            )
    )
Warning: Invalid metadata$r
Show the code
figure <- data |>
    dplyr::filter(publication_year >= 1900) |>
    ggplot() +
    geom_bar(aes(x = publication_year, y = p), stat = "identity") +
    geom_line(aes(x = publication_year, y = p_cum / 5), color = "red") +
    geom_line(aes(x = publication_year, y = p_oa_cum / 5), color = "blue") +
    scale_x_continuous(breaks = seq(1900, 2020, 10)) +
    scale_y_continuous(
        "Proportion of publications",
        sec.axis = sec_axis(~ . * 5, name = "Cumulative proportion") # divide by 100 to scale back the secondary axis
    ) +
    labs(
        title = "Publications over time",
        x = "Year",
        y = "Number of publications"
    ) +
    theme_minimal() +
    theme(axis.text.y.right = element_text(color = "red"))

ggplot2::ggsave(
    file.path("figures", "publications_over_time.pdf"),
    width = 12,
    height = 6,
    figure
)
ggplot2::ggsave(
    file.path("figures", "publications_over_time.png"),
    width = 12,
    height = 6,
    figure
)

rm(figure)

To download high resolution, click here

Show the code
data |> IPBES.R::table_dt(fn = "publications_per_country")
Show the code
rm(data)

Map first author affiliation

Distribution of the first author affiliation countries

The following calculations were done (count refers to the count of works per country in the subsidies corpus, count_oa to the count of works per country in the OpenAlex corpus):

  • **count** = ifelse(is.na(count), 0, count)
  • **log_count** = log(count + 1)
  • **log_count_oa** = log(count_oa + 1)
  • **p* = count / sum(count)
  • **p_oa** = count_oa / sum(count_oa)
  • **p_output** = count / count_oa
  • **p_diff** = (p_oa - p) * 100
Show the code
data <- read_corpus(file.path("data", "corpus_tca")) |>
    dplyr::select(author) |>
    collect()
Warning: Invalid metadata$r
Show the code
data <- sapply(
    data$author,
    function(x) {
        auth <- ifelse(
            is.na(x),
            NA,
            purrr::map(x, ~ .x |>
                base64enc::base64decode() |>
                unserialize())
        )
        if (is.na(auth)) {
            return(NULL)
        } else {
            return(auth[[1]]$institution_country_code[[1]])
        }
    }
) |>
    unlist() |>
    table() |>
    as.data.frame() |>
    rename(
        iso2c = Var1,
        count = Freq
    ) |>
    mutate(
        log_count = log(count + 1)
    )

data$iso3c <- countrycode::countrycode(
    data$iso2c,
    origin = "iso2c",
    destination = "iso3c"
)

data_oa <- openalexR::oa_fetch(
    group_by = "authorships.countries",
    output = "tibble",
    verbose = FALSE
) |>
    rename(count_oa = count)
Show the code
data_oa$iso3c <- countrycode::countrycode(
    data_oa$key_display_name,
    origin = "country.name",
    destination = "iso3c"
)
Warning: Some values were not matched unambiguously: Kosovo
Show the code
data_oa <- data_oa |>
    mutate(
        key_display_name = NULL,
        key = NULL
    )

data <- dplyr::full_join(
    data,
    data_oa
) |>
    mutate(
        count = ifelse(is.na(count), 0, count),
        log_count = log(count + 1),
        log_count_oa = log(count_oa + 1),
        p = count / sum(count),
        p_oa = count_oa / sum(count_oa),
        p_output = count / count_oa,
        p_diff = (p_oa - p) * 100
    )

map <- patchwork::wrap_plots(
    data |> map_country_codes(
        map_type = "countries",
        values = "log_count_oa",
        geodata_path = gdm_dir
    ) +
        ggplot2::scale_fill_gradient2(low = "#E69F00", high = "#56B4E9") +
        ggplot2::ggtitle("log(count + 1) of overall publications (log_count_oa)"),
    data |> map_country_codes(
        map_type = "countries",
        values = "log_count",
        geodata_path = gdm_dir
    ) +
        ggplot2::scale_fill_gradient2(low = "#E69F00", high = "#56B4E9") +
        ggplot2::ggtitle("log(count + 1) of TCA publications (log_count)"),
    data |> map_country_codes(
        map_type = "countries",
        values = "p_oa",
        geodata_path = gdm_dir
    ) +
        ggplot2::scale_fill_gradient2(low = "#E69F00", high = "#56B4E9") +
        ggplot2::ggtitle("Overall research output (p_oa)"),
    data |> map_country_codes(
        map_type = "countries",
        values = "p",
        geodata_path = gdm_dir
    ) +
        ggplot2::scale_fill_gradient2(low = "#E69F00", high = "#56B4E9") +
        ggplot2::ggtitle("TCA research output (p)"),
    data |>
        map_country_codes(
            map_type = "countries",
            values = "p_diff",
            geodata_path = gdm_dir
        ) +
        ggplot2::scale_fill_gradient2(low = "#E69F00", mid = "white", high = "#56B4E9", midpoint = 0) +
        ggplot2::ggtitle("difference (TCA - overall) output (p_oa - p)"),
    ncol = 2
)
Warning in map_country_codes(data, map_type = "countries", values = "log_count_oa", : The following countries are not in the world dataset: 
MCO, AIA, BLM, BMU, BVT, CXR, GGY, GIB, IOT, MHL, MDV, NFK, NRU, PCN, SMR, TKL, TUV, UMI, VAT, VGB, WLF, NA
and will therefore not be plotted!
Warning in map_country_codes(data, map_type = "countries", values = "log_count", : The following countries are not in the world dataset: 
MCO, AIA, BLM, BMU, BVT, CXR, GGY, GIB, IOT, MHL, MDV, NFK, NRU, PCN, SMR, TKL, TUV, UMI, VAT, VGB, WLF, NA
and will therefore not be plotted!
Warning in map_country_codes(data, map_type = "countries", values = "p_oa", : The following countries are not in the world dataset: 
MCO, AIA, BLM, BMU, BVT, CXR, GGY, GIB, IOT, MHL, MDV, NFK, NRU, PCN, SMR, TKL, TUV, UMI, VAT, VGB, WLF, NA
and will therefore not be plotted!
Warning in map_country_codes(data, map_type = "countries", values = "p", : The following countries are not in the world dataset: 
MCO, AIA, BLM, BMU, BVT, CXR, GGY, GIB, IOT, MHL, MDV, NFK, NRU, PCN, SMR, TKL, TUV, UMI, VAT, VGB, WLF, NA
and will therefore not be plotted!
Warning in map_country_codes(data, map_type = "countries", values = "p_diff", : The following countries are not in the world dataset: 
MCO, AIA, BLM, BMU, BVT, CXR, GGY, GIB, IOT, MHL, MDV, NFK, NRU, PCN, SMR, TKL, TUV, UMI, VAT, VGB, WLF, NA
and will therefore not be plotted!
Show the code
ggplot2::ggsave(
    file.path("maps", "publications_countries.pdf"),
    width = 12,
    height = 8,
    map
)
ggplot2::ggsave(
    file.path("maps", "publications_countries.png"),
    width = 12,
    height = 8,
    map
)

rm(map)

To download high resolution, click here

Show the code
data |> IPBES.R::table_dt(fn = "publications_per_country")
Show the code
rm(data)

Sentiment Analysis

For analyzing the sentiments of the provided abstracts, we have used the Python NLTK package, and VADER (Valence Aware Dictionary for Sentiment Reasoning) which is an NLTK module that provides sentiment scores based on the words used. VADER is a pre-trained, rule-based sentiment analysis model in which the terms are generally labeled as per their semantic orientation as either positive or negative.

The main advantage/reason for using this model was that it doesn’t require a labbed training dataset. The output of the model is 4 statistical scores:

  • compound: composite score that summarizes the overall sentiment of the text, where scores close to 1 indicate a positive sentiment, scores close to -1 indicate a negative sentiment, and scores close to 0 indicate a neutral sentiment
  • negative: percentage of negative sentiments in the text
  • neutral: percentage of neutral sentiments in the text
  • positive: percentage of positive sentiments in the text
Show the code
readRDS("input/SentAnalysis_Scores.rds") |>
    IPBES.R::table_dt(fn = "sentiment_scores", fixedColumns = list(leftColumns = 2))
Warning in instance$preRenderHook(instance): It seems your data is too big for
client-side DataTables. You may consider server-side processing:
https://rstudio.github.io/DT/server.html

Sentiments Over Time

This graphs shows the sentiment scores of the sentiment analysis over time.

Show the code
data <- readRDS("input/SentAnalysis_Scores.rds") |>
    dplyr::group_by(year) |>
    dplyr::summarize(
        neg = mean(neg),
        neu = mean(neu),
        pos = mean(pos),
        compound = mean(compound),
        n = n()
    ) |>
    dplyr::filter(
        n > 10
    )

figure <- data |>
    tidyr::pivot_longer(cols = c(neg, neu, pos, compound), names_to = "type", values_to = "value") |>
    ggplot2::ggplot() +
    ggplot2::geom_line(aes(x = year, y = value, color = type, linetype = type)) +
    ggplot2::scale_color_manual(values = c("black", "red", "blue", "green")) +
    ggplot2::labs(
        title = "Sentiment Analysis Scores (n > 10)",
        x = "Year",
        y = "Score",
        color = "Type",
        linetype = "Type"
    ) +
    ggplot2::theme_minimal()


ggplot2::ggsave(
    file.path("figures", "sentiments_over_time.pdf"),
    width = 12,
    height = 6,
    figure
)
ggplot2::ggsave(
    file.path("figures", "sentiments_over_time.png"),
    width = 12,
    height = 6,
    figure
)

rm(figure)

To download high resolution, click here

Negative Sentiment

Over Time

This graphs shows the compound score of the sentiment analysis over time. It only

Show the code
data <- readRDS("input/SentAnalysis_Scores.rds") |>
    dplyr::group_by(year) |>
    dplyr::summarize(
        neg = mean(neg),
        n = n()
    ) |>
    dplyr::filter(
        n > 10
    )

figure <- data |>
    ggplot2::ggplot() +
    ggplot2::geom_line(ggplot2::aes(x = year, y = neg)) +
    ggplot2::labs(
        title = "Sentiment Analysis negative Score (n > 10)",
        x = "Year",
        y = "Negative score"
    ) +
    ggplot2::theme_minimal()

ggplot2::ggsave(
    file.path("figures", "sentiments_neg_over_time.pdf"),
    width = 12,
    height = 6,
    figure
)
ggplot2::ggsave(
    file.path("figures", "sentiments_neg_over_time.png"),
    width = 12,
    height = 6,
    figure
)

rm(figure)

To download high resolution, click here

Show the code
data |> IPBES.R::table_dt(fn = "sentiments_neg_over_time")
Show the code
rm(data)
Per country
Show the code
data <- read_corpus(file.path("data", "corpus_tca")) |>
    dplyr::select(id, author) |>
    collect() |>
    mutate(
        author = IPBES.R::unserialize_arrow(author),
        country_first_author = extract_authors(author)
    ) |>
    dplyr::left_join(
        readRDS("input/SentAnalysis_Scores.rds"),
        by = "id"
    ) |>
    group_by(country_first_author) |>
    summarize(
        mean_neg = mean(neg),
        n = n()
    ) |>
    arrange(desc(mean_neg)) |>
    rename(
        iso2c = country_first_author
    ) |>
    mutate(
        iso3c = countrycode::countrycode(
            iso2c,
            origin = "iso2c",
            destination = "iso3c"
        )
    )
Warning: Invalid metadata$r
Show the code
map <- data |>
    map_country_codes(
        map_type = "countries",
        values = "mean_neg",
        geodata_path = gdm_dir
    ) +
    ggplot2::scale_fill_gradient2(low = "#E69F00", mid = "white", high = "#56B4E9", midpoint = 0) +
    ggplot2::ggtitle("Mean negative sentiment (0 - 1) - all countries")
Warning in map_country_codes(data, map_type = "countries", values = "mean_neg", : The following countries are not in the world dataset: 
MCO, NA
and will therefore not be plotted!
Show the code
map_sel <- data |>
    dplyr::filter(n > 10) |>
    map_country_codes(
        map_type = "countries",
        values = "mean_neg",
        geodata_path = gdm_dir
    ) +
    ggplot2::scale_fill_gradient2(low = "#E69F00", mid = "white", high = "#56B4E9", midpoint = 0) +
    ggplot2::ggtitle("Mean negative sentiment (0 - 1) - more than 10 works")
Warning in map_country_codes(dplyr::filter(data, n > 10), map_type = "countries", : The following countries are not in the world dataset: 
NA
and will therefore not be plotted!
Show the code
ggplot2::ggsave(
    file.path("maps", "sentiment_neg_per_countries_all.pdf"),
    width = 12,
    height = 8,
    map
)
ggplot2::ggsave(
    file.path("maps", "sentiment_neg_per_countries_all.png"),
    width = 12,
    height = 8,
    map
)

ggplot2::ggsave(
    file.path("maps", "sentiment_neg_per_countries_10.pdf"),
    width = 12,
    height = 8,
    map_sel
)
ggplot2::ggsave(
    file.path("maps", "sentiment_neg_per_countries_10.png"),
    width = 12,
    height = 8,
    map_sel
)

rm(map)

To download high resolution, click here

To download high resolution, click here

In the table, all countries are included

Show the code
data |> IPBES.R::table_dt(fn = "sentiment_neg_per_countries")
Show the code
rm(data)

Neutral Sentiment

Over Time

This graphs shows the compound score of the sentiment analysis over time. It only

Show the code
data <- readRDS("input/SentAnalysis_Scores.rds") |>
    dplyr::group_by(year) |>
    dplyr::summarize(
        neu = mean(neu),
        n = n()
    ) |>
    dplyr::filter(
        n > 10
    )

figure <- data |>
    ggplot2::ggplot() +
    ggplot2::geom_line(ggplot2::aes(x = year, y = neu)) +
    ggplot2::labs(
        title = "Sentiment Analysis neutral Score (n > 10)",
        x = "Year",
        y = "Neutral score"
    ) +
    ggplot2::theme_minimal()

ggplot2::ggsave(
    file.path("figures", "sentiments_neu_over_time.pdf"),
    width = 12,
    height = 6,
    figure
)
ggplot2::ggsave(
    file.path("figures", "sentiments_neu_over_time.png"),
    width = 12,
    height = 6,
    figure
)

rm(figure)

To download high resolution, click here

Show the code
data |> IPBES.R::table_dt(fn = "sentiments_neu_over_time")
Show the code
rm(data)
Per country
Show the code
data <- read_corpus(file.path("data", "corpus_tca")) |>
    dplyr::select(id, author) |>
    collect() |>
    mutate(
        author = IPBES.R::unserialize_arrow(author),
        country_first_author = extract_authors(author)
    ) |>
    dplyr::left_join(
        readRDS("input/SentAnalysis_Scores.rds"),
        by = "id"
    ) |>
    group_by(country_first_author) |>
    summarize(
        mean_neu = mean(neu),
        n = n()
    ) |>
    arrange(desc(mean_neu)) |>
    rename(
        iso2c = country_first_author
    ) |>
    mutate(
        iso3c = countrycode::countrycode(
            iso2c,
            origin = "iso2c",
            destination = "iso3c"
        )
    )
Warning: Invalid metadata$r
Show the code
map <- data |>
    map_country_codes(
        map_type = "countries",
        values = "mean_neu",
        geodata_path = gdm_dir
    ) +
    ggplot2::scale_fill_gradient2(low = "#E69F00", mid = "white", high = "#56B4E9", midpoint = 0) +
    ggplot2::ggtitle("Mean neutral sentiment (0 - 1) - all countries")
Warning in map_country_codes(data, map_type = "countries", values = "mean_neu", : The following countries are not in the world dataset: 
MCO, NA
and will therefore not be plotted!
Show the code
map_sel <- data |>
    dplyr::filter(n > 10) |>
    map_country_codes(
        map_type = "countries",
        values = "mean_neu",
        geodata_path = gdm_dir
    ) +
    ggplot2::scale_fill_gradient2(low = "#E69F00", mid = "white", high = "#56B4E9", midpoint = 0) +
    ggplot2::ggtitle("Mean neutral sentiment (0 - 1) - more than 10 works")
Warning in map_country_codes(dplyr::filter(data, n > 10), map_type = "countries", : The following countries are not in the world dataset: 
NA
and will therefore not be plotted!
Show the code
ggplot2::ggsave(
    file.path("maps", "sentiment_neu_per_countries_all.pdf"),
    width = 12,
    height = 8,
    map
)
ggplot2::ggsave(
    file.path("maps", "sentiment_neu_per_countries_all.png"),
    width = 12,
    height = 8,
    map
)

ggplot2::ggsave(
    file.path("maps", "sentiment_neu_per_countries_10.pdf"),
    width = 12,
    height = 8,
    map_sel
)
ggplot2::ggsave(
    file.path("maps", "sentiment_neu_per_countries_10.png"),
    width = 12,
    height = 8,
    map_sel
)

rm(map)

To download high resolution, click here

To download high resolution, click here

In the table, all countries are included

Show the code
data |> IPBES.R::table_dt(fn = "sentiment_neu_per_countries")
Show the code
rm(data)

Positive Sentiment

Over Time

This graphs shows the compound score of the sentiment analysis over time. It only

Show the code
data <- readRDS("input/SentAnalysis_Scores.rds") |>
    dplyr::group_by(year) |>
    dplyr::summarize(
        pos = mean(pos),
        n = n()
    ) |>
    dplyr::filter(
        n > 10
    )

figure <- data |>
    ggplot2::ggplot() +
    ggplot2::geom_line(ggplot2::aes(x = year, y = pos)) +
    ggplot2::labs(
        title = "Sentiment Analysis positive Score (n > 10)",
        x = "Year",
        y = "Positive score"
    ) +
    ggplot2::theme_minimal()

ggplot2::ggsave(
    file.path("figures", "sentiments_pos_over_time.pdf"),
    width = 12,
    height = 6,
    figure
)
ggplot2::ggsave(
    file.path("figures", "sentiments_pos_over_time.png"),
    width = 12,
    height = 6,
    figure
)

rm(figure)

To download high resolution, click here

Show the code
data |> IPBES.R::table_dt(fn = "sentiments_pos_over_time")
Show the code
rm(data)
Per country
Show the code
data <- read_corpus(file.path("data", "corpus_tca")) |>
    dplyr::select(id, author) |>
    collect() |>
    mutate(
        author = IPBES.R::unserialize_arrow(author),
        country_first_author = extract_authors(author)
    ) |>
    dplyr::left_join(
        readRDS("input/SentAnalysis_Scores.rds"),
        by = "id"
    ) |>
    group_by(country_first_author) |>
    summarize(
        mean_pos = mean(pos),
        n = n()
    ) |>
    arrange(desc(mean_pos)) |>
    rename(
        iso2c = country_first_author
    ) |>
    mutate(
        iso3c = countrycode::countrycode(
            iso2c,
            origin = "iso2c",
            destination = "iso3c"
        )
    )
Warning: Invalid metadata$r
Show the code
map <- data |>
    map_country_codes(
        map_type = "countries",
        values = "mean_pos",
        geodata_path = gdm_dir
    ) +
    ggplot2::scale_fill_gradient2(low = "#E69F00", mid = "white", high = "#56B4E9", midpoint = 0) +
    ggplot2::ggtitle("Mean positive sentiment (0 - 1) - all countries")
Warning in map_country_codes(data, map_type = "countries", values = "mean_pos", : The following countries are not in the world dataset: 
MCO, NA
and will therefore not be plotted!
Show the code
map_sel <- data |>
    dplyr::filter(n > 10) |>
    map_country_codes(
        map_type = "countries",
        values = "mean_pos",
        geodata_path = gdm_dir
    ) +
    ggplot2::scale_fill_gradient2(low = "#E69F00", mid = "white", high = "#56B4E9", midpoint = 0) +
    ggplot2::ggtitle("Mean positive sentiment (0 - 1) - more than 10 works")
Warning in map_country_codes(dplyr::filter(data, n > 10), map_type = "countries", : The following countries are not in the world dataset: 
NA
and will therefore not be plotted!
Show the code
ggplot2::ggsave(
    file.path("maps", "sentiment_pos_per_countries_all.pdf"),
    width = 12,
    height = 8,
    map
)
ggplot2::ggsave(
    file.path("maps", "sentiment_pos_per_countries_all.png"),
    width = 12,
    height = 8,
    map
)

ggplot2::ggsave(
    file.path("maps", "sentiment_pos_per_countries_10.pdf"),
    width = 12,
    height = 8,
    map_sel
)
ggplot2::ggsave(
    file.path("maps", "sentiment_pos_per_countries_10.png"),
    width = 12,
    height = 8,
    map_sel
)

rm(map)

To download high resolution, click here

To download high resolution, click here

In the table, all countries are included

Show the code
data |> IPBES.R::table_dt(fn = "sentiment_neu_per_countries")
Show the code
rm(data)

Compound Sentiment

Over Time

This graphs shows the compound score of the sentiment analysis over time. It only

Show the code
data <- readRDS("input/SentAnalysis_Scores.rds") |>
    dplyr::group_by(year) |>
    dplyr::summarize(
        compound = mean(compound),
        n = n()
    ) |>
    dplyr::filter(
        n > 10
    )

figure <- data |>
    ggplot2::ggplot() +
    ggplot2::geom_line(ggplot2::aes(x = year, y = compound)) +
    ggplot2::labs(
        title = "Sentiment Analysis Compound Score (n > 10)",
        x = "Year",
        y = "Compound score"
    ) +
    ggplot2::theme_minimal()

ggplot2::ggsave(
    file.path("figures", "sentiments_comp_over_time.pdf"),
    width = 12,
    height = 6,
    figure
)
ggplot2::ggsave(
    file.path("figures", "sentiments_comp_over_time.png"),
    width = 12,
    height = 6,
    figure
)

rm(figure)

To download high resolution, click here

Show the code
data |> IPBES.R::table_dt(fn = "sentiments_comp_over_time")
Show the code
rm(data)
Per country
Show the code
data <- read_corpus(file.path("data", "corpus_tca")) |>
    dplyr::select(id, author) |>
    collect() |>
    mutate(
        author = IPBES.R::unserialize_arrow(author),
        country_first_author = extract_authors(author)
    ) |>
    dplyr::left_join(
        readRDS("input/SentAnalysis_Scores.rds"),
        by = "id"
    ) |>
    group_by(country_first_author) |>
    summarize(
        mean_compound = mean(compound),
        n = n()
    ) |>
    arrange(desc(mean_compound)) |>
    rename(
        iso2c = country_first_author
    ) |>
    mutate(
        iso3c = countrycode::countrycode(
            iso2c,
            origin = "iso2c",
            destination = "iso3c"
        )
    )
Warning: Invalid metadata$r
Show the code
map <- data |>
    map_country_codes(
        map_type = "countries",
        values = "mean_compound",
        geodata_path = gdm_dir
    ) +
    ggplot2::scale_fill_gradient2(low = "#E69F00", mid = "white", high = "#56B4E9", midpoint = 0) +
    ggplot2::ggtitle("Mean compound sentiment (-1: negative; 1: positive) - all countries")
Warning in map_country_codes(data, map_type = "countries", values = "mean_compound", : The following countries are not in the world dataset: 
MCO, NA
and will therefore not be plotted!
Show the code
map_sel <- data |>
    dplyr::filter(n > 10) |>
    map_country_codes(
        map_type = "countries",
        values = "mean_compound",
        geodata_path = gdm_dir
    ) +
    ggplot2::scale_fill_gradient2(low = "#E69F00", mid = "white", high = "#56B4E9", midpoint = 0) +
    ggplot2::ggtitle("Mean compound sentiment (-1: negative; 1: positive) - more than 10 works")
Warning in map_country_codes(dplyr::filter(data, n > 10), map_type = "countries", : The following countries are not in the world dataset: 
NA
and will therefore not be plotted!
Show the code
ggplot2::ggsave(
    file.path("maps", "sentiment_comp_per_countries_all.pdf"),
    width = 12,
    height = 8,
    map
)
ggplot2::ggsave(
    file.path("maps", "sentiment_comp_per_countries_all.png"),
    width = 12,
    height = 8,
    map
)

ggplot2::ggsave(
    file.path("maps", "sentiment_comp_per_countries_10.pdf"),
    width = 12,
    height = 8,
    map_sel
)
ggplot2::ggsave(
    file.path("maps", "sentiment_comp_per_countries_10.png"),
    width = 12,
    height = 8,
    map_sel
)

rm(map)

To download high resolution, click here

To download high resolution, click here

In the table, all countries are included

Show the code
data |> IPBES.R::table_dt(fn = "sentiment_comp_per_countries")
Show the code
rm(data)

Reuse

Citation

BibTeX citation:
@report{krug,
  author = {Krug, Rainer M.},
  title = {Report {Assessment} {Ch5} {Subsidies} {Reform}},
  doi = {XXXXXX},
  langid = {en},
  abstract = {A short description what this is about. This is not a
    tracditional abstract, but rather something else ...}
}
For attribution, please cite this work as:
Krug, Rainer M. n.d. “Report Assessment Ch5 Subsidies Reform.” IPBES Data Management Report. https://doi.org/XXXXXX.